package com.zhucx.core;

import com.zhucx.ZhuCrawler;
import com.zhucx.parser.PageParser;
import org.apache.commons.lang3.ObjectUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

public class SpiderMain {

    /**
     * 采集主入口
     * @param url
     * 采集地址
     * @param saveDiv
     * 保存元素
     * @param deleteDiv
     * 删除元素
     * @return
     */
    public static Document getPageDocument(String[] url,String saveDiv,String deleteDiv){
         Document pageDoc = null;
        ZhuCrawler zhuCrawler = new ZhuCrawler.Builder()
                .setUrls(url)
                .setThreadCount(3)
//                .setPageLoader(new PlaywrightPageLoader())
                .setSelectElement(saveDiv)
                .setDeleteElement(deleteDiv)
                .setPageParser(new PageParser<Object>(){
                    @Override
                    public void parse(Document doc, Element pageVoElement) {
                        if(ObjectUtils.isNotEmpty(doc)){
                            pageDoc.append(doc.html());
                        }
                    }

                })
                .build();

        System.out.println("start");
        zhuCrawler.start(true);
        System.out.println("end");
        return pageDoc;
    }
}
